import pandas as pd
#读取excel文件
df = pd.read_excel('测试.xlsx',"Sheet1",engine='openpyxl')
#读取前10行数据
print(df.head(10))
print(type(df))
zidian={"身高":[123,232,32],"体重":[45,46,77],"年龄":[12,13,13]}
zidianf=pd.DataFrame(zidian)
print(zidianf)

#基础信息
#全部的表
print(df.head())
#表的头的信息与数据类型与个数
print(df.info())
#缺失值处理
print(df.head())
df=df.dropna()
print(df)
print(df.info)
#数据类型转换
if "体重_num" in df.columns:
    df["体重_num"] = df["体重_num"].astype(int)
else:
    print("The '体重_num' column does not exist in the DataFrame.")
print(df.info())
#选择和过滤
numeric_columns = df.select_dtypes(include=['number']).columns
for col in numeric_columns:
    mean = df[col].mean()
    std = df[col].std()
    # Define the upper and lower bounds
    lower_bound = mean - 3 * std
    upper_bound = mean + 3 * std
    # Filter out outliers
    df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]

print("Data after cleaning using the Three - Sigma Rule:")
print(df)



